Now that we have a database made, we need to make a lookup table that tells us what the most likely population size is given that we've had n draws and m unique observations made. We will create a query for the n,m lookup and run it iteratively.
In [16]:
import sqlite3
import numpy as np
In [17]:
conn = sqlite3.connect('ICCP-database.sqlite')
c = conn.cursor()
In [20]:
outputdict = {}
for n in range(110):
for m in range(n+1):
c.execute('select count(draws.sim_num), sim.n from draws inner join sim on sim.sim_num=draws.sim_num where '\
'draws.draw_num={0} and uniques={1} group by n'.format(n+1,m+1))
result = c.fetchall()
c.execute('select count(sim_num), n from sim where max_draw<{0} and n>={1} group by n;'.format(n, m))
result2 = c.fetchall()
result = np.asarray(result)
result2 = np.asarray(result2)
if np.size(result)==0: result = np.reshape(result, (0,2))
if np.size(result2)==0: result2 = np.reshape(result2, (0,2))
result2 = np.vstack((result, result2))
result = []
for i in np.unique(result2[:,1]):
total = sum(result2[result2[:,1]==i,0])
result.append((i,total))
result = np.asarray(result)
if np.size(result)==0:
outputdict[n+1,m+1]=(np.array([0]), np.array([0]), 0)
else:
max_n = result[result[:,1]== max(result[:,1]),0]
likelihood = max(result[:,1])/sum(result[:,1])
outputdict[n+1,m+1]=(max_n.flatten(), likelihood, max(result[:,1]))
In [14]:
conn.commit()
conn.close()